{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "view-in-github"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/swap-253/Time-Series-And-Anomaly-Detection/blob/main/Anomaly_detection_Using_LSTM_AutoEncoders.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: plotly in /Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages (5.6.0)\r\n",
      "Requirement already satisfied: tenacity>=6.2.0 in /Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages (from plotly) (8.0.1)\r\n",
      "Requirement already satisfied: six in /Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages (from plotly) (1.16.0)\r\n"
     ]
    }
   ],
   "source": [
    "!pip install plotly"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "PbVkw3i7mg0Z"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/Users/liguanghui/opt/anaconda3/envs/py37-pytorch/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    },
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'tensorflow_datasets'",
     "output_type": "error",
     "traceback": [
      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[0;31mModuleNotFoundError\u001B[0m                       Traceback (most recent call last)",
      "\u001B[0;32m/var/folders/x5/46xxxny51s7dgh2f4k1_ys280000gn/T/ipykernel_61781/3685964907.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m     10\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0mpandas\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mplotting\u001B[0m \u001B[0;32mimport\u001B[0m \u001B[0mregister_matplotlib_converters\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     11\u001B[0m \u001B[0;32mimport\u001B[0m \u001B[0mplotly\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgraph_objects\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mgo\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 12\u001B[0;31m \u001B[0;32mimport\u001B[0m \u001B[0mtensorflow_datasets\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mtfds\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m     13\u001B[0m \u001B[0;32mimport\u001B[0m \u001B[0mtensorflow_probability\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mtfp\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     14\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
      "\u001B[0;31mModuleNotFoundError\u001B[0m: No module named 'tensorflow_datasets'"
     ]
    }
   ],
   "source": [
    "#importing suitable libraries\n",
    "import numpy as np\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.enable_v2_behavior()\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "from pylab import rcParams\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import rc\n",
    "from pandas.plotting import register_matplotlib_converters\n",
    "import plotly.graph_objects as go\n",
    "import tensorflow_datasets as tfds\n",
    "import tensorflow_probability as tfp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "swXNFaQ5Csoo"
   },
   "outputs": [],
   "source": [
    "#setting sns and plotly plots\n",
    "sns.set(style='whitegrid', palette='muted', font_scale=1.5)\n",
    "rcParams['figure.figsize'] = 10,5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 72,
     "resources": {
      "http://localhost:8080/nbextensions/google.colab/files.js": {
       "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCkgewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwogICAgICBwZXJjZW50LnRleHRDb250ZW50ID0KICAgICAgICAgIGAke01hdGgucm91bmQoKHBvc2l0aW9uIC8gZmlsZURhdGEuYnl0ZUxlbmd0aCkgKiAxMDApfSUgZG9uZWA7CiAgICB9CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK",
       "headers": [
        [
         "content-type",
         "application/javascript"
        ]
       ],
       "ok": true,
       "status": 200,
       "status_text": ""
      }
     }
    },
    "id": "x-Yd0mc6zVb0",
    "outputId": "6b803744-ea48-4b51-d5b2-88bea7d5138c"
   },
   "outputs": [],
   "source": [
    "from google.colab import files\n",
    "uploaded = files.upload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "id": "dhcfLdSHm6p-",
    "outputId": "301011b2-e86b-4adb-d953-1cdd47f1d1e6"
   },
   "outputs": [],
   "source": [
    "#reading the dataframe\n",
    "df = pd.read_csv('JNJ.txt')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "-Pfl0g3QzvSy"
   },
   "outputs": [],
   "source": [
    "#selecting useful columns from the dataframe\n",
    "df = df[['Date', 'Close']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "id": "NvTljh07WMAE",
    "outputId": "f0923f62-fd01-4058-b097-c651982bbc1d"
   },
   "outputs": [],
   "source": [
    "#just to see that the last date on which data was collected is 3 September 2020\n",
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "id": "4oQdQpuP2Hb2",
    "outputId": "b7fdd40b-7a6a-4292-e97a-b222b6a10c72"
   },
   "outputs": [],
   "source": [
    "#bringing date in the required format\n",
    "df['Date'] = pd.to_datetime(df['Date'])\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 542
    },
    "id": "sGH7w4nKikFp",
    "outputId": "43270952-50a3-4956-a0f5-6b6064b8d184"
   },
   "outputs": [],
   "source": [
    "#A scatterplot of date vs Closing Price of Johnson and Johnson stock \n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Scatter(x=df['Date'], y=df['Close'], name='Close price'))\n",
    "fig.update_layout(showlegend=True, title='Johnson and Johnson Stock Price 1985-2020')\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "9Cz4C73TxG_Y"
   },
   "outputs": [],
   "source": [
    "#choosing the training and test sets specified by date\n",
    "train, test = df.loc[df['Date'] <= '2013-09-03'], df.loc[df['Date'] > '2013-09-03']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "2Qpy9IyKTmju",
    "outputId": "ae87e783-11cd-44c4-f7ae-171396b5d0fa"
   },
   "outputs": [],
   "source": [
    "len(train),len(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "id": "rRtbidKn2sdr",
    "outputId": "d4d25d53-8001-4ad4-8c68-f7f550112f48"
   },
   "outputs": [],
   "source": [
    "train.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "hkxx7ETZ8oF8",
    "outputId": "05b40846-527d-4b56-ad98-331db5e3b412"
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "scaler = StandardScaler()\n",
    "#for scaling data is fitted on training set and transformed for both training and test sets\n",
    "scaler = scaler.fit(train[['Close']])\n",
    "train['Close'] = scaler.transform(train[['Close']])\n",
    "test['Close'] = scaler.transform(test[['Close']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rp89-jBZpskM"
   },
   "outputs": [],
   "source": [
    "#A function to create datasets taking 30 closing prices at a time to predict the next closing price\n",
    "def create_dataset(X, y, time_steps=30):\n",
    "    Xs, ys = [], []\n",
    "    for i in range(len(X) - time_steps):\n",
    "        v = X.iloc[i:(i + time_steps)].values\n",
    "        Xs.append(v)        \n",
    "        ys.append(y.iloc[i + time_steps])\n",
    "    return np.array(Xs), np.array(ys)\n",
    "X_train, y_train = create_dataset(train[['Close']], train['Close'])\n",
    "X_test, y_test = create_dataset(test[['Close']], test['Close'])    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "UFpUz7OFps8J",
    "outputId": "99d67bb7-0e57-4395-e7d4-f9c80a798104"
   },
   "outputs": [],
   "source": [
    "print(X_train.shape,y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "da55zoY1qSQ0",
    "outputId": "c1bbef01-34e5-4ed7-9326-95ce1309d287"
   },
   "outputs": [],
   "source": [
    "#This is an LSTM A.E Model which tries to reconstruct the data considering it as sequential and the impermissible errors above a \n",
    "# certain threshold would be considered as anomalies \n",
    "model = tf.keras.Sequential()\n",
    "model.add(tf.keras.layers.LSTM(\n",
    "    units=128, \n",
    "    input_shape=(X_train.shape[1], X_train.shape[2])\n",
    "))\n",
    "model.add(tf.keras.layers.Dropout(rate=0.2))\n",
    "model.add(tf.keras.layers.RepeatVector(n=X_train.shape[1]))\n",
    "model.add(tf.keras.layers.LSTM(units=128, return_sequences=True))\n",
    "model.add(tf.keras.layers.Dropout(rate=0.2))\n",
    "model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=X_train.shape[2])))\n",
    "model.compile(loss='mae', optimizer='adam')\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Neuixuq7FXPh"
   },
   "source": [
    "The RepeatVector layer simply repeats the input n times. Adding return_sequences=True in LSTM layer makes it return the sequence.\n",
    "\n",
    "Finally, the TimeDistributed layer creates a vector with a length of the number of outputs from the previous layer. This LSTM Autoencoder is ready for training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "HhUPMxZtrFTS",
    "outputId": "fc070c66-3d41-4c76-a513-d4edd286661d"
   },
   "outputs": [],
   "source": [
    "#fitting of LSTM A.E Model on training set\n",
    "history = model.fit(X_train, y_train,epochs=7,batch_size=32,validation_split=0.1,shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 334
    },
    "id": "2GYE03C1TUv0",
    "outputId": "11ead21a-a981-4e8f-f614-c0a1252c0203"
   },
   "outputs": [],
   "source": [
    "#plotting the losses\n",
    "plt.plot(history.history['loss'], label='train')\n",
    "plt.plot(history.history['val_loss'], label='test')\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "eS1MkZiN43eE",
    "outputId": "7678d546-b32c-4ead-c99b-7b34dd4b7a80"
   },
   "outputs": [],
   "source": [
    "#evaluation of model's prediction on test data\n",
    "model.evaluate(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 369
    },
    "id": "DgOzzuc_xq8i",
    "outputId": "ad3cc798-9ae9-463c-9fe1-7e5fea96cee1"
   },
   "outputs": [],
   "source": [
    "X_train_pred = model.predict(X_train)\n",
    "train_mae_loss = np.mean(np.abs(X_train_pred - X_train), axis=1)\n",
    "plt.hist(train_mae_loss, bins=50)\n",
    "#plotting a histogram of training loss with number of samples\n",
    "plt.xlabel('Train MAE loss')\n",
    "plt.ylabel('Number of Samples');\n",
    "#threshold may be considered as the highest value of error in training data reconstruction \n",
    "threshold = np.max(train_mae_loss)\n",
    "print(f'Reconstruction error threshold: {threshold}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 352
    },
    "id": "G0IRFw03cP2V",
    "outputId": "852aaba0-5b9a-42c2-8c52-8119f7950cdc"
   },
   "outputs": [],
   "source": [
    "#plotting a histogram of testing data loss with number of samples\n",
    "X_test_pred = model.predict(X_test, verbose=0)\n",
    "test_mae_loss = np.mean(np.abs(X_test_pred-X_test), axis=1)\n",
    "plt.hist(test_mae_loss, bins=50)\n",
    "plt.xlabel('Test MAE loss')\n",
    "plt.ylabel('Number of samples');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "U-87Uma2uEg6"
   },
   "outputs": [],
   "source": [
    "TIME_STEPS=30\n",
    "#creating a new dataframe for test data which indicates where its an anomaly or not\n",
    "test_score_df = pd.DataFrame(test[TIME_STEPS:])\n",
    "test_score_df['loss'] = test_mae_loss\n",
    "test_score_df['threshold'] = threshold\n",
    "test_score_df['anomaly'] = test_score_df['loss'] > test_score_df['threshold']\n",
    "test_score_df['Close'] = test[TIME_STEPS:]['Close']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 359
    },
    "id": "wy1c39sc6IWH",
    "outputId": "d788c6c2-ad67-43bf-f78d-d4f4e59b24af"
   },
   "outputs": [],
   "source": [
    "test_score_df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 542
    },
    "id": "qWiYvGmkv_Pb",
    "outputId": "13eab531-588c-4b4d-da9b-44543fd8c899"
   },
   "outputs": [],
   "source": [
    "#A plotted figure of test loss vs threshold\n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Scatter(x=test_score_df['Date'], y=test_score_df['loss'], name='Test loss'))\n",
    "fig.add_trace(go.Scatter(x=test_score_df['Date'], y=test_score_df['threshold'], name='Threshold'))\n",
    "fig.update_layout(showlegend=True, title='Test loss vs. Threshold')\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 204
    },
    "id": "EkVxvoAt0Egc",
    "outputId": "d5216e26-c4a3-4434-ef5f-ededcfa50afd"
   },
   "outputs": [],
   "source": [
    "#anomalies is a dataframe consisting of anomalous test data\n",
    "anomalies = test_score_df[test_score_df.anomaly == True]\n",
    "anomalies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "DI4ZG3Ay6llz",
    "outputId": "4c023cc4-b0f5-428d-bfe2-bac707f70ca7"
   },
   "outputs": [],
   "source": [
    "#below is the number of anomalous examples\n",
    "anomalies.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 434
    },
    "id": "b9pidLApzimf",
    "outputId": "7d89bcd0-430a-41cc-87de-9c692a59aade"
   },
   "outputs": [],
   "source": [
    "#plotting the test data displaying anomalous data with red dots\n",
    "plt.plot(test[TIME_STEPS:].index, scaler.inverse_transform(test[TIME_STEPS:].Close), label='close price');\n",
    "sns.scatterplot(anomalies.index,scaler.inverse_transform(anomalies.Close),color=sns.color_palette()[3],label='anomaly')\n",
    "plt.xticks(rotation=25)\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 542
    },
    "id": "73hl1uWx7egN",
    "outputId": "29d18d91-2aea-4e26-f494-94436ed5ae52"
   },
   "outputs": [],
   "source": [
    "#A plot displaying closing prices Detected Anomalies with the Date  \n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Scatter(x=test_score_df['Date'], y=scaler.inverse_transform(test_score_df['Close']), name='Close price'))\n",
    "fig.add_trace(go.Scatter(x=anomalies['Date'], y=scaler.inverse_transform(anomalies['Close']), mode='markers', name='Anomaly'))\n",
    "fig.update_layout(showlegend=True, title='Detected anomalies')\n",
    "fig.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "vTymVsL77--3"
   },
   "source": [
    "The model found that some low price anomalies in March and high price anomalies in April.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "muiVUGVk7z5u"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "include_colab_link": true,
   "name": "Anomaly-detection- Using LSTM-AutoEncoders.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}